I'm gonna overwrite a lot of this notebook's old content. I changed the way I'm calculating wt, and wanna test that my training worked.



In [1]:

    
from pearce.emulator import OriginalRecipe, ExtraCrispy, SpicyBuffalo
from pearce.mocks import cat_dict
import numpy as np
from os import path



In [2]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [3]:

    
training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5'
#training_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoFixedNdLowMsat.hdf5'
test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'
#test_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoFixedNdLowMsatTest.hdf5'

em_method = 'gp'
split_method = 'random'



In [4]:

    
a = 1.0
z = 1.0/a - 1.0



In [5]:

    
fixed_params = {'z':z}#, 'cosmo': 0}#, 'r':24.06822623}

v = np.array([ 2.68607108, -4.29605403, -12. , 12., -11.18424935, -8.39772494 , -8.88556884, 9.00797499 , 12. , 10.56121261, 12., 5.50280428, 0.59482347 , -6.24363245 , -8.26644505, 0.25342135, 0.96977814 , 12., 3.37450278, 12. , 7.15832218, 0.79122102, 11.9455156 , 1.64464882, 4.03020203])

v = np.array([ 0.2661017, 0.1054246, 1.1295944, 0.3643993, 0.2408568, 11.5649985, 5.6407612, 4.9071932, 10.6279446, 11.7621938, 4.7031938, 6.3770235, 11.7578699, 11.7547548, 8.4866085, -12.0550382, 1.8339794, 10.6161248, 2.2441632, 13.8155106, 10.6371797, 11.3512804, 7.342365 , 3.1795786, 3.7658774, 5.0188608, 4.6846614, 13.8155106, 13.8155106, 5.545777 , 13.8155106, -1.5383083, -13.8155106])



In [6]:

    
v = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_result_xigm.npy')



In [7]:

    
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']



In [8]:

    
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)



In [9]:

    
from collections import defaultdict
metric = defaultdict(list)
for val, pname in zip(v, pnames):
    metric[pname].append(val)

# guessing cosmo comes first? zx_kernel_pnames = ['omch2', 'ombh2', 'ln10As', 'H0', 'ns', 'Neff', 'w0', 'logM1', 'alpha', 'logM0', 'sigma_logM',None, None, None, None, 'amp'] zx_kernel_pnames.extend(zx_kernel_pnames) zx_kernel_pnames.append('amp')

zx_metric = {} for pname, val in zip(zx_kernel_pnames, v): if pname is None: continue if pname not in zx_metric: zx_metric[pname] = [] zx_metric[pname].append(val)

m1 = [zx_metric[pname][0] for pname in param_names] m2 = [zx_metric[pname][0] for pname in param_names] a1, a2, a3 = zx_metric['amp']

from george.kernels import *

#kernel = ExpSquaredKernel(np.exp(m1), ndim=11) * ConstantKernel(a1, ndim=11)\ #+ Matern32Kernel(np.exp(m2), ndim=11) + ConstantKernel(a2, ndim=11)#+ WhiteKernel(a3, ndim=11) kernel = Matern52Kernel(np.exp(0),ndim=11)+ ConstantKernel(0, ndim = 11)

np.random.seed(0) emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\ custom_mean_function = 'linear', downsample_factor=0.02)#, #hyperparams = {'n_estimators': 500, # 'max_depth': 5})



In [10]:

    
np.random.seed(0)
emu = SpicyBuffalo(training_file, method = em_method,hyperparams = {'metric':metric}, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 0.1)

for pname, val in zip(emu._emulators[0].get_parameter_names(), emu._emulators[0].get_parameter_vector()): print pname, val

for pname in emu.get_param_names(): print pname, emu.get_param_bounds(pname)

n_leaves, n_overlap = 5000, 1 emu = ExtraCrispy(training_file, n_leaves, n_overlap, split_method, method = em_method, fixed_params=fixed_params, custom_mean_function = 'linear', downsample_factor = 1.0)



In [11]:

    
emu.scale_bin_centers









    Out[11]:





array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
         0.38145568,   0.53882047,   0.76110414,   1.07508818,
         1.51860241,   2.14508292,   3.03001016,   4.28000311,
         6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
        24.06822623,  33.99727318])



In [12]:

    
rbins = np.logspace(-1.1, 1.6, 19)
print (rbins[1:]+rbins[:-1])/2









    



[  0.09581733   0.13534558   0.19118072   0.27004994   0.38145568
   0.53882047   0.76110414   1.07508817   1.51860241   2.14508292
   3.03001016   4.28000311   6.04566509   8.53972892  12.06268772
  17.0389993   24.06822623  33.99727318]

#v = [ 5.30324096, -2.92565173, -4.42429766, -2.48246708, 10.0699553, # -8.69248089, 0.15083459, -4.72773171, 1.49655743, 1.17087069, # 0.97997249, -1.91976981, 7.17709699, -10.26944746, 10.37865121, # 0.50210416, 12.01525603, 5.89975769, 4.78413158, -1.33582408, # -9.94571436, 6.50103759, 9.70125021, -11.7173251, -0.10738552] #v = [-12., -5.56039378, 5.01999226, 9.63986449, -12., # -12., -2.13397232, 7.90203306, -12., 0.51574787, # 7.25074046, -1.28201782, 2.84486642, -12., -12., # 12., -7.75414731, 12., 12., -12., # 5.15929851, -12., 0.53369134, -3.81346879, -6.9684267, ] #v = [ -1.14028239, -12., -8.08712614 , -1.51590059 , 2.66822232, # -12., -10.17631757, 6.17448276, 6.2380102, -12., # -3.63076923, -1.3260091, 1.84463374, -2.57120539, 0.42652469, # 0.94596673, 2.04145112, -5.13442469, 12., 8.86492205, # -12., 2.44659378, 12., 2.10889078, 9.36958256] v = [ 12. , 12. , 12. , 12. , 12. , 12., 11.67920429 , 6.81656135 , 12. , 1.92715272 , 12. , 7.72884642, 12., -12. , 2.57697301, 12. , 8.85016763, 9.96558899 , 6.24704116 , 12. , 12. , -12. , -12. , -12., 12., ] v = np.ones_like(v)*12.0 #[ -8.16992654, 8.67223223, -2.21244202, -8.9034059, 2.12923655, # -9.85755136, 8.6699058, 1.24032812, -0.41040494 , 1.93942663, # 3.70688402, 5.53623883, 10.27060377, -8.96397854, -1.54633767, # 6.72846738, -9.38415988, -9.68900453, -3.5219898, -9.2396184, # 10.60952486, -2.12807193, -9.92944798, -2.87456867, -8.28144359] #[-2.30321746, -6., 2.74986581, -0.59132415, 0.74564814, -1.47243557, #5.38958859, 2.36982126, 5.35110678, 1.6798139, 6., 2.82272942, #6. ] #[ 6. -6. -2.95890662 -6. -3.63039154 6. # -0.93930637 -6. -6. 2.2162459 6. 2.75175138 # 5.09211467]

if hasattr(emu, "_emulator"): emu._emulator.set_parameter_vector(v) emu._emulator.recompute() else: for _emulator in emu._emulators: _emulator.set_parameter_vector(v) _emulator.recompute()



In [13]:

    
params = {}
for pname in emu.get_param_names():
    if pname == 'r':
        continue
    low, high = emu.get_param_bounds(pname)
    params[pname] = np.random.uniform(low, high)
    
print params









    



{'logM1': 14.267701848157271, 'Neff': 3.6095427706197905, 'logM0': 13.578807613765713, 'sigma_logM': 0.2502193887675708, 'H0': 73.31620420249872, 'w0': -0.5886073500474833, 'omch2': 0.10271736946651576, 'ln10As': 3.0459840518637358, 'alpha': 1.031390598923265, 'ns': 0.9832511796584299, 'ombh2': 0.021913242278466367}



In [14]:

    
pred_y = emu.emulate_wrt_r(params)[0]
print pred_y









    



[ 4.04004748  3.75891956  3.44550816  3.12585732  2.7892213   2.44664001
  2.07815025  1.68794447  1.28838445  0.93358763  0.6619232   0.43599471
  0.21960341 -0.01169283 -0.23920516 -0.49295352 -0.73753417 -1.0304472 ]



In [15]:

    
plt.plot(emu.scale_bin_centers, pred_y)
plt.xscale('log')

pred_y = emu.emulate(params)[0] print pred_y

plt.plot(emu.scale_bin_centers, pred_y) plt.xscale("log")



In [16]:

    
gof = emu.goodness_of_fit(test_file, statistic = 'frac')
#print gof.mean(), np.median(gof)
for g in gof:
    print g.mean(), np.median(g)









    



0.0234517600943 0.0121923981071
0.0192527749647 0.0109628666191
0.0188197683256 0.0125569191338
0.0174105517534 0.0115639485909
0.0180021921549 0.0138528889531
0.0170768295276 0.0115808824458
0.0170542635561 0.0126511857355
0.0164322821349 0.0117216605456
0.0162993074686 0.0112952571418
0.0184492766357 0.0138849150335
0.0207765933422 0.0165579597545
0.0172226956557 0.0125498110043
0.01102411654 0.0074401020925
0.0123994008471 0.009087794042
0.0137004409511 0.0117419100576
0.0113102910711 0.00830910180484
0.0119856512156 0.0067302607619
0.0224484907575 0.0184341607037



In [17]:

    
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)



In [18]:

    
test_x, _, _, _ = emu.get_data(test_file, fixed_params=fixed_params)

train_x, _, _, _ = emu.get_data(training_file, fixed_params=fixed_params)

emu.get_param_names()









    Out[18]:





['ombh2',
 'omch2',
 'w0',
 'ns',
 'ln10As',
 'H0',
 'Neff',
 'logM0',
 'sigma_logM',
 'logM1',
 'alpha']



In [19]:

    
print test_x.shape, train_x.shape
print test_x.shape[0]/(7*emu.n_bins), train_x.shape[0]/(40*emu.n_bins)









    



(126000, 12) (720000, 12)
1000 1000



In [20]:

    
idx1, idx2 = -5, -2
plt.scatter(test_x[0:1000*emu.n_bins:emu.n_bins, idx1], test_x[0:1000*emu.n_bins:emu.n_bins, idx2], alpha = 0.5)
plt.scatter(train_x[0:1000*emu.n_bins:emu.n_bins, idx1], train_x[0:1000*emu.n_bins:emu.n_bins, idx2], color = 'g', alpha = 0.5)









    Out[20]:





<matplotlib.collections.PathCollection at 0x7f23c73c7d50>



In [21]:

    
resmat = np.zeros( (len(pred_y), pred_y[0].shape[0]))
for bin_no in xrange(len(pred_y)):
    py, dy = pred_y[bin_no], data_y[bin_no]
    resmat[bin_no] = 10**py - 10**dy
    
    #print np.sqrt(np.mean( (py - dy)**2)*py.shape[0]/(py.shape[0]-1) )



In [22]:

    
scovmat = resmat.dot(resmat.T)/(len(pred_y[0])-1)



In [23]:

    
print np.sqrt(np.diag(scovmat))









    



[  3.29453937e+02   1.64128669e+02   9.23737801e+01   5.12651187e+01
   2.82956875e+01   1.65647486e+01   9.05888752e+00   5.32473359e+00
   2.62057192e+00   1.25187794e+00   5.18920220e-01   1.83799943e-01
   5.62377534e-02   3.11488548e-02   1.88440419e-02   9.49574675e-03
   4.99840516e-03   4.12298800e-03]

np.savetxt('/home/users/swmclau2/Git/pearce/bin/mcmc/xigm_scov.npy', scovmat)

emu._save_as_default_metric(metric)



In [52]:

    
%%bash
cp /home/users/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/default_metrics.pkl /home/users/swmclau2/Git/pearce/emulator/default_metrics.pkl









    



cp: cannot create regular file ‘/home/users/swmclau2/Git/pearce/emulator/default_metrics.pkl’: No such file or directory

metric['amp'] = [1, 1] metric['bias'] = [0.0]

for downsample_factor in [ 0.01, 0.05, 0.2, 0.4, 0.6, 0.8, 1.0]: np.random.seed(0) emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\ custom_mean_function = 'linear', downsample_factor=downsample_factor) gof = emu.goodness_of_fit(test_file, statistic = 'log_frac') print downsample_factor, gof.mean(), np.median(gof)

emu._save_as_default_metric(metric)



In [53]:

    
#print g.mean(), np.median(g)

for g in gof: print g.mean(), np.median(g)



In [54]:

    
plt.hist(y)









    




NameErrorTraceback (most recent call last)
<ipython-input-54-3b2670a335aa> in <module>()
----> 1 plt.hist(y)

NameError: name 'y' is not defined



In [ ]:

    
plt.hist(pred_y)



In [ ]:

    
plt.hist(emu.y)

for idx in xrange(emu.n_bins): fig = plt.figure(figsize=(10, 4)) rat = np.abs(pred_y[idx] - y[idx])/np.abs(y[idx]) print rat.mean(), np.percentile(rat, [ 50, 68, 95]) plt.subplot(121) plt.hist(np.log10(rat), bins = 50 ); plt.subplot(122) plt.hist(pred_y[idx]-y[idx], bins = np.linspace(-1, 1, 51) ); plt.show()



In [ ]:

    
plt.hist(np.log10(gof));



In [ ]:

    
plt.hist(emu.y)



In [ ]:

    
for i in xrange(50):    
    params = {}

    for pname in emu.get_param_names():
        if pname == 'r':
            continue
        low, high = emu.get_param_bounds(pname)
        params[pname] = np.random.uniform(low, high)
    pred_y = emu.emulate(params)[0]
    print pred_y
    #print params



In [ ]:

    
for i, (g, r) in enumerate(zip(gof, emu.scale_bin_centers)):
    print r, g.mean(), np.median(g)
    #plt.hist(np.log10(g))
    #plt.show()

plt.hist(np.log10(gof) );

from sklearn.model_selection import train_test_split

x, y, yerr = emu.x, emu.y, emu.yerr downsample_idxs = np.random.choice(x.shape[0], size = int(0.08*x.shape[0]), replace = False) x,y, yerr = x[downsample_idxs, :], y[downsample_idxs], yerr[downsample_idxs] train_x, test_x, train_y, test_y, train_yerr, test_yerr = train_test_split(x, y, yerr, test_size = 0.1)



In [ ]:

    
n_cosmo_params = 7
loo_cosmo = emu.x[0, 0,  :n_cosmo_params]

loo_cosmo_idxs = np.all(emu.x[:, :,:n_cosmo_params] == loo_cosmo, axis =2)
train_x, train_y, train_yerr = emu.x[~loo_cosmo_idxs, :], emu.y[ ~loo_cosmo_idxs], emu.yerr[ ~loo_cosmo_idxs]
test_x, test_y, test_yerr = emu.x[loo_cosmo_idxs, :], emu.y[loo_cosmo_idxs], emu.yerr[loo_cosmo_idxs]

train_x, train_y, train_yerr = emu.x, emu.y, emu.yerr



In [ ]:

    
model = emu._emulator
model.compute(train_x, train_yerr)

test_x, test_y, test_yerr, _ = emu.get_data(test_file,fixed_params, None)



In [ ]:

    
pred_y = model.predict(train_y, test_x, False, False, False)*emu._y_std + emu._y_mean



In [ ]:

    
np.mean(np.abs((pred_y-test_y)/test_y))
#np.mean(np.abs((pred_y-train_y)/train_y))

for idx in xrange(50): plt.plot(emu.scale_bin_centers, ypred[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'Emu') plt.plot(emu.scale_bin_centers, emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'True') plt.title(np.sum(emu.x[(idx+1)*emu.n_bins, :-1]) ) plt.legend(loc='best') plt.xscale('log') plt.show()



In [ ]:

    
queue_skipper: True
        system: sherlock
        n_jobs: 400
        max_time: 6
resids = np.abs(emu.y*emu._y_std+emu._y_mean - ypred)



In [ ]:

    
np.mean(resids/(emu.y*emu._y_std+emu._y_mean))



In [ ]:

    
ypred.mean(), emu._y_mean

plt.plot(emu.scale_bin_centers, np.abs(gof.mean(axis = 0)) ) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.05) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.1) plt.loglog();

plt.plot(emu.scale_bin_centers, np.abs(gof.T),alpha = 0.1, color = 'b') plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01, lw = 2, color = 'k') plt.loglog();



In [ ]:

    
test_gof = emu.goodness_of_fit(test_file, statistic = 'log_frac')
print test_gof.mean()



In [ ]:

    
test_gof = emu.goodness_of_fit(test_file, statistic = 'frac')
print test_gof.mean()



In [ ]:

    
plt.hist(np.log10(test_gof));



In [ ]:

    
test_x



In [ ]:

    
(emu.x*emu._x_std) + emu._x_mean



In [ ]:

    
emu.get_param_names()



In [ ]:

    
test_x_white, test_y_white = (test_x - emu._x_mean)/(emu._x_std + 1e-5), (test_y - emu._y_mean)/(emu._y_std + 1e-5)



In [ ]:

    
model = emu._emulator



In [ ]:

    
pred_y_white = model.predict(emu.y, test_x_white, False, False, False)



In [ ]:

    
pred_y = pred_y_white*emu._y_std + emu._y_mean



In [ ]:

    
plt.plot(pred_y[:100], label = 'pred')
plt.plot(test_y[:100], label = 'truth')

plt.legend(loc = 'best')



In [ ]:

    
test_y.mean(), emu._y_mean, pred_y.mean()



In [ ]:

    
test_y.std(), emu._y_std, pred_y.std()



In [ ]:

    
plt.hist(pred_y_white, bins = np.linspace(-3, 3, 100), label = 'Pred')
plt.hist(test_y_white, bins = np.linspace(-3, 3, 100), label = 'Test', alpha = 0.4);
plt.legend(loc = 'best')



In [ ]:



In [ ]: